1 Showcasing RRPlots

1.0.1 Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
layout(matrix(1:1, nrow=1))

1.0.2 Wisconsin Data Set

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

1.1 Exploring Raw Features with RRPlot

convar <- colnames(dataBreast)[lapply(apply(dataBreast,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataBreast[,c("status",convar)],"status")
pander::pander(topvar)
V35 V24 V34 V7 V16 V14 V17
0.0261 0.0261 0.0261 0.0623 0.126 0.126 0.126
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
topf <- topFive[1]
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataBreast$status,dataBreast[,topf]),
                              atProb=c(0.90,0.80),
                  timetoEvent=dataBreast$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

1.2 Reporting the Metrics

pander::pander(RRanalysis[[1]]$keyPoints,caption=topFive[1])
V35
  Thr RR SEN SPE BACC
@:0.9 1.00e+01 1.33 0.152 0.8919 0.522
@:0.8 3.00e+00 2.50 0.478 0.7973 0.638
@MAX_BACC 1.00e+00 2.43 0.630 0.6554 0.643
@MAX_RR -2.44e-09 6.35 0.978 0.1554 0.567
@SPE100 -6.76e-09 22.38 1.000 0.0608 0.530
pander::pander(RRanalysis[[2]]$keyPoints,caption=topFive[2])
V24
  Thr RR SEN SPE BACC
@:0.9 25.4 1.94 0.239 0.8919 0.566
@:0.8 24.0 1.72 0.348 0.7973 0.573
@MAX_BACC 20.3 2.45 0.739 0.5270 0.633
@MAX_RR 16.6 3.87 0.957 0.1824 0.569
@SPE100 15.5 30.33 1.000 0.0811 0.541
RRanalysis[[2]]$keyPoints["@MAX_BACC",c("BACC","RR")]
           BACC       RR

@MAX_BACC 0.6330787 2.451923

ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
MAXBACC <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
  MAXBACC <- rbind(MAXBACC,RRanalysis[[topf]]$keyPoints["@MAX_BACC",c("BACC")])
}
rownames(CstatCI) <- topFive
rownames(RRatios) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
rownames(MAXBACC) <- topFive

pander::pander(ROCAUC)
  est lower upper
V35 0.660 0.572 0.748
V24 0.633 0.542 0.724
V34 0.660 0.574 0.746
V7 0.610 0.515 0.705
V16 0.598 0.504 0.692
pander::pander(CstatCI)
  mean.C Index median lower upper
V35 0.642 0.643 0.554 0.730
V24 0.677 0.675 0.597 0.756
V34 0.658 0.659 0.587 0.728
V7 0.666 0.666 0.584 0.743
V16 0.614 0.616 0.527 0.699
pander::pander(RRatios)
  est lower upper
V35 1.37 0.698 2.69
V24 1.93 1.122 3.31
V34 1.44 0.741 2.82
V7 1.37 0.700 2.69
V16 1.04 0.462 2.32
pander::pander(LogRangp)
V35 9.35e-05
V24 9.38e-03
V34 2.37e-02
V7 7.33e-02
V16 2.13e-02
pander::pander(Sensitivity)
  est lower upper
V35 0.152 0.0634 0.289
V24 0.239 0.1259 0.388
V34 0.152 0.0634 0.289
V7 0.152 0.0634 0.289
V16 0.109 0.0362 0.236
pander::pander(Specificity)
  est lower upper
V35 0.899 0.838 0.942
V24 0.899 0.838 0.942
V34 0.899 0.838 0.942
V7 0.899 0.838 0.942
V16 0.899 0.838 0.942
pander::pander(MAXBACC)
V35 0.643
V24 0.633
V34 0.640
V7 0.621
V16 0.614
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1],RRatios[,1],MAXBACC)
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe","RR","MAX_BACC")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe RR MAX_BACC
V35 0.660 0.642 0.152 0.899 1.37 0.643
V24 0.633 0.677 0.239 0.899 1.93 0.633
V34 0.660 0.658 0.152 0.899 1.44 0.640
V7 0.610 0.666 0.152 0.899 1.37 0.621
V16 0.598 0.614 0.109 0.899 1.04 0.614

1.3 Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast,NumberofRepeats = 10)

[++++++++++++++++++++++++++++++++++++++++++++++++++++++++++]…..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 5.23e-02 1.02 1.05 1.09 0.598 0.237
V27 2.45e-04 1.00 1.00 1.00 0.608 0.241
V26 3.99e-03 1.00 1.00 1.01 0.593 0.336
V34 1.18e-02 1.00 1.01 1.02 0.634 0.277
V7 6.74e-08 1.00 1.00 1.00 0.588 0.237
V35 2.21e-03 1.00 1.00 1.00 0.727 0.592
V6 2.88e-07 1.00 1.00 1.00 0.577 0.237
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.500 0.609 0.0619 0.437 2.87
V27 0.608 0.608 0.501 0.608 0.0563 0.433 2.76
V26 0.598 0.598 0.529 0.602 0.0622 0.398 2.75
V34 0.631 0.618 0.511 0.617 0.0312 0.466 2.40
V7 0.588 0.595 0.500 0.595 0.0487 0.380 2.30
V35 0.619 0.641 0.598 0.615 0.0275 0.551 2.24
V6 0.577 0.588 0.500 0.588 0.0459 0.353 2.19
  z.NRI Delta.AUC Frequency
V24 2.67 0.1091 1.0
V27 2.63 0.1071 1.0
V26 2.41 0.0731 1.0
V34 2.82 0.1055 1.0
V7 2.30 0.0949 1.0
V35 3.41 0.0171 1.0
V6 2.13 0.0881 0.3

1.4 Cox Model Performance

Here we evaluate the model using the RRPlot() function.

1.4.1 The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90,0.80),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

1.4.2 Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.844 0.618 1.13
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.02 1.02 0.965 1.07
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
0.798 0.798 0.79 0.807
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.682 0.683 0.597 0.759
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.64 0.549 0.731
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80% at_max_BACC at_max_RR atSPE100
0.419 0.36 0.253 0.178 0.161
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.81 1.06 3.09
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 12.346960 on 2 degrees of freedom, p = 0.002084
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 147 29 36.99 1.725 8.974
class=1 20 5 4.11 0.193 0.216
class=2 27 12 4.90 10.269 11.609